We will first import the dataset counties_fixed.csv
election <- vroom::vroom("data/counties_fixed.csv") %>%
clean_names() %>%
mutate(winner = case_when(
percentage20_donald_trump >= percentage20_joe_biden ~ "Trump",
percentage20_donald_trump < percentage20_joe_biden ~ "Biden"),
winner16 = case_when(
percentage16_donald_trump >= percentage16_hillary_clinton ~ "Trump",
percentage20_donald_trump < percentage16_hillary_clinton ~ "Clinton")
)
glimpse(election)
## Rows: 4,867
## Columns: 53
## $ x1 <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12…
## $ county <chr> "Abbeville", "Acadia", "Accomack", "Ada"…
## $ state <chr> "SC", "LA", "VA", "ID", "IA", "KY", "MO"…
## $ percentage16_donald_trump <dbl> 0.629, 0.773, 0.545, 0.479, 0.653, 0.806…
## $ percentage16_hillary_clinton <dbl> 0.346, 0.206, 0.428, 0.387, 0.300, 0.161…
## $ total_votes16 <dbl> 10724, 27386, 15755, 195587, 3759, 8231,…
## $ votes16_donald_trump <dbl> 6742, 21159, 8582, 93748, 2456, 6637, 60…
## $ votes16_hillary_clinton <dbl> 3712, 5638, 6737, 75676, 1127, 1323, 349…
## $ percentage20_donald_trump <dbl> 0.661, 0.795, 0.542, 0.504, 0.697, 0.830…
## $ percentage20_joe_biden <dbl> 0.330, 0.191, 0.447, 0.465, 0.286, 0.159…
## $ total_votes20 <dbl> 12433, 28425, 16938, 259389, 4183, 8766,…
## $ votes20_donald_trump <dbl> 8215, 22596, 9172, 130699, 2917, 7275, 6…
## $ votes20_joe_biden <dbl> 4101, 5443, 7578, 120539, 1197, 1391, 37…
## $ lat <dbl> 34.22333, 30.29506, 37.76707, 43.45266, …
## $ long <dbl> -82.46171, -92.41420, -75.63235, -116.24…
## $ cases <dbl> 805, 3182, 1227, 17451, 222, 517, 578, 8…
## $ deaths <dbl> 17, 102, 19, 181, 1, 22, 0, 11, 263, 1, …
## $ total_pop <dbl> 24788, 62607, 32840, 435117, 7192, 19304…
## $ men <dbl> 12044, 30433, 16079, 217999, 3552, 9632,…
## $ women <dbl> 12744, 32174, 16761, 217118, 3640, 9672,…
## $ hispanic <dbl> 1.3, 2.4, 8.8, 7.9, 1.7, 1.8, 2.3, 6.4, …
## $ white <dbl> 68.9, 77.5, 60.3, 85.2, 96.6, 93.4, 90.5…
## $ black <dbl> 27.6, 17.6, 28.3, 1.2, 0.3, 3.6, 2.4, 0.…
## $ native <dbl> 0.1, 0.1, 0.3, 0.4, 0.0, 0.1, 0.2, 41.7,…
## $ asian <dbl> 0.3, 0.1, 0.7, 2.6, 0.4, 0.1, 2.3, 0.6, …
## $ pacific <dbl> 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.2, …
## $ voting_age_citizen <dbl> 19452, 45197, 24408, 316189, 5572, 15280…
## $ income <dbl> 35254, 40492, 42260, 60151, 49477, 36575…
## $ income_err <dbl> 2259, 2544, 2253, 1294, 2633, 3426, 2130…
## $ income_per_cap <dbl> 19234, 21591, 24266, 31642, 28861, 18408…
## $ income_per_cap_err <dbl> 799, 1002, 1564, 725, 2055, 1010, 1702, …
## $ poverty <dbl> 22.7, 21.5, 19.8, 11.8, 9.5, 21.5, 26.2,…
## $ child_poverty <dbl> 32.1, 27.6, 31.8, 13.1, 12.1, 27.1, 20.7…
## $ professional <dbl> 27.2, 27.6, 31.1, 43.0, 28.2, 28.5, 36.8…
## $ service <dbl> 20.7, 16.9, 17.7, 16.6, 16.9, 15.9, 18.2…
## $ office <dbl> 20.8, 25.7, 18.8, 25.0, 20.0, 19.7, 24.1…
## $ construction <dbl> 10.6, 15.0, 15.1, 6.9, 17.3, 12.2, 9.4, …
## $ production <dbl> 20.7, 14.8, 17.3, 8.4, 17.6, 23.8, 11.5,…
## $ drive <dbl> 78.3, 83.2, 80.0, 80.7, 77.9, 84.5, 77.3…
## $ carpool <dbl> 11.1, 10.3, 10.6, 7.7, 12.4, 9.0, 12.1, …
## $ transit <dbl> 0.5, 0.2, 0.5, 0.5, 0.3, 0.0, 0.1, 0.1, …
## $ walk <dbl> 1.8, 1.6, 2.6, 1.5, 2.8, 2.6, 4.0, 2.8, …
## $ other_transp <dbl> 1.8, 2.2, 1.8, 2.8, 0.4, 0.5, 2.6, 1.0, …
## $ work_at_home <dbl> 6.5, 2.5, 4.5, 6.9, 6.2, 3.4, 4.0, 3.2, …
## $ mean_commute <dbl> 25.8, 27.6, 22.0, 20.4, 22.3, 22.2, 17.1…
## $ employed <dbl> 9505, 24982, 13837, 214984, 3680, 7988, …
## $ private_work <dbl> 78.8, 80.0, 74.6, 78.3, 73.8, 74.1, 73.6…
## $ public_work <dbl> 13.3, 12.1, 18.1, 15.0, 15.3, 15.8, 20.9…
## $ self_employed <dbl> 7.8, 7.6, 7.1, 6.6, 10.4, 9.9, 5.3, 7.5,…
## $ family_work <dbl> 0.1, 0.3, 0.2, 0.1, 0.5, 0.1, 0.2, 0.5, …
## $ unemployment <dbl> 9.4, 8.9, 5.4, 4.3, 3.0, 6.2, 5.5, 5.5, …
## $ winner <chr> "Trump", "Trump", "Trump", "Trump", "Tru…
## $ winner16 <chr> "Trump", "Trump", "Trump", "Trump", "Tru…
Now we want to import the dataset electoral_college.csv. Here we have information about how many electoral votes each state has. We will filter by 2020.
# Data set with Electoral votes from all the years
electoral_votes<- read_csv(here("data","electoral_college.csv")) %>% clean_names() %>%
filter(year==2020)
Let’s look for duplicates
#remove empty rows and columns
temp<-remove_empty(election, which = c("rows","cols"))
#there were no empty rows to be removed
rm(temp)
#check for duplicates
#we look for entries on the same day and for the same country
election%>%get_dupes(county, state)
## # A tibble: 0 x 54
## # … with 54 variables: county <chr>, state <chr>, dupe_count <int>, x1 <dbl>,
## # percentage16_donald_trump <dbl>, percentage16_hillary_clinton <dbl>,
## # total_votes16 <dbl>, votes16_donald_trump <dbl>,
## # votes16_hillary_clinton <dbl>, percentage20_donald_trump <dbl>,
## # percentage20_joe_biden <dbl>, total_votes20 <dbl>,
## # votes20_donald_trump <dbl>, votes20_joe_biden <dbl>, lat <dbl>, long <dbl>,
## # cases <dbl>, deaths <dbl>, total_pop <dbl>, men <dbl>, women <dbl>,
## # hispanic <dbl>, white <dbl>, black <dbl>, native <dbl>, asian <dbl>,
## # pacific <dbl>, voting_age_citizen <dbl>, income <dbl>, income_err <dbl>,
## # income_per_cap <dbl>, income_per_cap_err <dbl>, poverty <dbl>,
## # child_poverty <dbl>, professional <dbl>, service <dbl>, office <dbl>,
## # construction <dbl>, production <dbl>, drive <dbl>, carpool <dbl>,
## # transit <dbl>, walk <dbl>, other_transp <dbl>, work_at_home <dbl>,
## # mean_commute <dbl>, employed <dbl>, private_work <dbl>, public_work <dbl>,
## # self_employed <dbl>, family_work <dbl>, unemployment <dbl>, winner <chr>,
## # winner16 <chr>
electoral_votes %>% get_dupes(state,year)
## # A tibble: 0 x 4
## # … with 4 variables: state <chr>, year <dbl>, dupe_count <int>, votes <dbl>
Get POLYGONS from urbanmaps for counties in the US
counties_sf <- get_urbn_map("counties", sf = TRUE)
counties_sf <- counties_sf %>%
mutate(county_name2 = case_when(
grepl('County$', county_name) ~ str_sub(county_name, end = -8),
grepl('Parish$', county_name) ~ str_sub(county_name, end = -8),
TRUE ~ county_name)
)
Join with elections dataset.
data <- counties_sf %>%
left_join(election, by = c( "county_name2" = "county", "state_abbv" = "state"))
glimpse(data)
## Rows: 3,142
## Columns: 59
## $ county_fips <chr> "04015", "12035", "20129", "28093", "295…
## $ state_abbv <chr> "AZ", "FL", "KS", "MS", "MO", "NM", "NC"…
## $ state_fips <chr> "04", "12", "20", "28", "29", "35", "37"…
## $ county_name <chr> "Mohave County", "Flagler County", "Mort…
## $ fips_class <chr> "H1", "H1", "H1", "H1", "C7", "H1", "H1"…
## $ state_name <chr> "Arizona", "Florida", "Kansas", "Mississ…
## $ county_name2 <chr> "Mohave", "Flagler", "Morton", "Marshall…
## $ x1 <dbl> 1929, 921, 2001, 1799, 2639, 1844, 2937,…
## $ percentage16_donald_trump <dbl> 0.737, 0.589, 0.836, 0.444, 0.159, 0.233…
## $ percentage16_hillary_clinton <dbl> 0.222, 0.383, 0.125, 0.540, 0.797, 0.628…
## $ total_votes16 <dbl> 74189, 57413, 1160, 14698, 127403, 20959…
## $ votes16_donald_trump <dbl> 54656, 33804, 970, 6525, 20281, 4893, 25…
## $ votes16_hillary_clinton <dbl> 16485, 21985, 145, 7944, 101487, 13162, …
## $ percentage20_donald_trump <dbl> 0.750, 0.599, 0.863, 0.505, 0.161, 0.295…
## $ percentage20_joe_biden <dbl> 0.237, 0.392, 0.126, 0.481, 0.823, 0.680…
## $ total_votes20 <dbl> 104667, 71846, 1163, 13064, 131765, 2640…
## $ votes20_donald_trump <dbl> 78534, 43039, 1004, 6591, 21185, 7782, 2…
## $ votes20_joe_biden <dbl> 24831, 28148, 147, 6283, 108385, 17969, …
## $ lat <dbl> 35.70472, 29.45934, 37.19141, 34.76216, …
## $ long <dbl> -113.75779, -81.31509, -101.79925, -89.5…
## $ cases <dbl> 4453, 2181, 56, 1548, NA, 4752, 264, 86,…
## $ deaths <dbl> 234, 39, 2, 32, NA, 262, 8, 1, NA, 0, 30…
## $ total_pop <dbl> 204691, 105015, 2931, 35981, 314867, 728…
## $ men <dbl> 103175, 50436, 1534, 17793, 152224, 3520…
## $ women <dbl> 101516, 54579, 1397, 18188, 162643, 3764…
## $ hispanic <dbl> 15.9, 10.0, 22.6, 3.5, 3.9, 14.1, 5.0, 1…
## $ white <dbl> 78.0, 74.7, 72.4, 47.7, 42.9, 9.1, 44.9,…
## $ black <dbl> 1.0, 10.0, 0.8, 47.6, 47.5, 0.5, 48.2, 0…
## $ native <dbl> 2.1, 0.3, 0.0, 0.1, 0.2, 73.3, 0.1, 2.8,…
## $ asian <dbl> 1.2, 2.5, 0.1, 0.1, 3.1, 0.9, 0.2, 0.0, …
## $ pacific <dbl> 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, …
## $ voting_age_citizen <dbl> 160544, 83353, 1978, 27763, 241414, 5054…
## $ income <dbl> 41567, 51049, 43813, 41134, 38664, 30336…
## $ income_err <dbl> 796, 1256, 9676, 2932, 799, 1354, 3147, …
## $ income_per_cap <dbl> 23527, 25741, 23038, 19775, 26739, 14077…
## $ income_per_cap_err <dbl> 630, 654, 1831, 1047, 519, 634, 2590, 33…
## $ poverty <dbl> 18.6, 13.3, 8.0, 17.8, 25.0, 37.5, 24.1,…
## $ child_poverty <dbl> 27.2, 21.5, 7.9, 26.6, 39.8, 45.7, 46.6,…
## $ professional <dbl> 24.3, 31.6, 29.5, 23.2, 40.6, 29.4, 15.8…
## $ service <dbl> 25.4, 19.3, 15.5, 14.3, 22.5, 25.1, 23.0…
## $ office <dbl> 26.8, 31.9, 17.3, 24.9, 21.7, 22.0, 23.6…
## $ construction <dbl> 10.9, 9.4, 32.5, 12.6, 5.0, 9.7, 12.3, 2…
## $ production <dbl> 12.7, 7.8, 5.3, 25.1, 10.2, 13.8, 25.3, …
## $ drive <dbl> 79.9, 83.5, 71.3, 86.1, 71.8, 74.8, 81.2…
## $ carpool <dbl> 12.6, 6.6, 13.6, 5.4, 8.2, 9.7, 13.1, 10…
## $ transit <dbl> 0.9, 0.3, 0.0, 0.4, 9.5, 0.6, 0.0, 0.0, …
## $ walk <dbl> 1.5, 0.3, 3.9, 1.4, 4.4, 3.7, 1.6, 8.0, …
## $ other_transp <dbl> 2.1, 1.7, 2.0, 0.8, 1.9, 1.6, 2.3, 0.6, …
## $ work_at_home <dbl> 3.1, 7.7, 9.3, 6.0, 4.2, 9.5, 1.8, 7.8, …
## $ mean_commute <dbl> 20.4, 25.7, 17.7, 28.2, 24.1, 22.1, 25.1…
## $ employed <dbl> 69774, 39278, 1275, 14304, 152079, 23207…
## $ private_work <dbl> 78.6, 81.5, 69.6, 80.3, 83.9, 56.9, 78.4…
## $ public_work <dbl> 14.1, 11.8, 20.5, 11.9, 11.8, 35.7, 15.0…
## $ self_employed <dbl> 7.1, 6.6, 9.8, 7.7, 4.0, 7.4, 6.6, 11.0,…
## $ family_work <dbl> 0.2, 0.1, 0.0, 0.1, 0.2, 0.0, 0.0, 0.4, …
## $ unemployment <dbl> 10.1, 6.5, 8.7, 6.2, 9.4, 16.1, 10.4, 2.…
## $ winner <chr> "Trump", "Trump", "Trump", "Trump", "Bid…
## $ winner16 <chr> "Trump", "Trump", "Trump", "Clinton", "C…
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((-1321573 -8…
View datasets summary.
# Summary
summary(election)
## x1 county state percentage16_donald_trump
## Min. : 0 Length:4867 Length:4867 Min. :0.0410
## 1st Qu.:1216 Class :character Class :character 1st Qu.:0.5500
## Median :2433 Mode :character Mode :character Median :0.6670
## Mean :2435 Mean :0.6362
## 3rd Qu.:3650 3rd Qu.:0.7505
## Max. :4954 Max. :0.9530
## NA's :1756
## percentage16_hillary_clinton total_votes16 votes16_donald_trump
## Min. :0.0310 Min. : 64 Min. : 57
## 1st Qu.:0.2045 1st Qu.: 4824 1st Qu.: 3207
## Median :0.2850 Median : 10935 Median : 7117
## Mean :0.3168 Mean : 40916 Mean : 19350
## 3rd Qu.:0.3990 3rd Qu.: 28675 3rd Qu.: 17396
## Max. :0.9280 Max. :2314275 Max. :590465
## NA's :1756 NA's :1756 NA's :1756
## votes16_hillary_clinton percentage20_donald_trump percentage20_joe_biden
## Min. : 4 Min. :0.0000 Min. :0.0310
## 1st Qu.: 1164 1st Qu.:0.4540 1st Qu.:0.2470
## Median : 3140 Median :0.6020 Median :0.3760
## Mean : 19566 Mean :0.5847 Mean :0.3951
## 3rd Qu.: 9536 3rd Qu.:0.7340 3rd Qu.:0.5240
## Max. :1654626 Max. :0.9620 Max. :1.0000
## NA's :1756 NA's :318 NA's :318
## total_votes20 votes20_donald_trump votes20_joe_biden lat
## Min. : 0 Min. : 0 Min. : 0 Min. : 0.00
## 1st Qu.: 2322 1st Qu.: 1233 1st Qu.: 740 1st Qu.:34.27
## Median : 7481 Median : 4348 Median : 2417 Median :38.17
## Mean : 33162 Mean : 15691 Mean : 16876 Mean :37.25
## 3rd Qu.: 20072 3rd Qu.: 12278 3rd Qu.: 7366 3rd Qu.:41.68
## Max. :4139895 Max. :1107090 Max. :2947568 Max. :69.31
## NA's :230 NA's :230 NA's :230 NA's :1615
## long cases deaths total_pop
## Min. :-174.16 Min. : 0.0 Min. : 0.00 Min. : 74
## 1st Qu.: -98.07 1st Qu.: 216.8 1st Qu.: 2.00 1st Qu.: 10945
## Median : -89.92 Median : 614.0 Median : 10.00 Median : 25692
## Mean : -89.42 Mean : 2808.8 Mean : 70.75 Mean : 102166
## 3rd Qu.: -82.84 3rd Qu.: 1758.0 3rd Qu.: 35.00 3rd Qu.: 67445
## Max. : 0.00 Max. :309190.0 Max. :7404.00 Max. :10105722
## NA's :1615 NA's :1615 NA's :1615 NA's :1725
## men women hispanic white
## Min. : 39 Min. : 35 Min. : 0.000 Min. : 0.60
## 1st Qu.: 5514 1st Qu.: 5460 1st Qu.: 2.100 1st Qu.: 65.10
## Median : 12798 Median : 12885 Median : 4.000 Median : 84.20
## Mean : 50292 Mean : 51873 Mean : 9.121 Mean : 76.76
## 3rd Qu.: 33481 3rd Qu.: 34108 3rd Qu.: 9.300 3rd Qu.: 92.90
## Max. :4979641 Max. :5126081 Max. :99.200 Max. :100.00
## NA's :1725 NA's :1725 NA's :1725 NA's :1725
## black native asian pacific
## Min. : 0.000 Min. : 0.000 Min. : 0.00 Min. : 0.0000
## 1st Qu.: 0.600 1st Qu.: 0.100 1st Qu.: 0.30 1st Qu.: 0.0000
## Median : 2.100 Median : 0.300 Median : 0.60 Median : 0.0000
## Mean : 8.896 Mean : 1.812 Mean : 1.32 Mean : 0.0855
## 3rd Qu.: 9.875 3rd Qu.: 0.600 3rd Qu.: 1.20 3rd Qu.: 0.1000
## Max. :86.900 Max. :90.300 Max. :41.80 Max. :33.7000
## NA's :1725 NA's :1725 NA's :1725 NA's :1725
## voting_age_citizen income income_err income_per_cap
## Min. : 59 Min. : 19264 Min. : 262 Min. : 9334
## 1st Qu.: 8279 1st Qu.: 41123 1st Qu.: 1762 1st Qu.:21810
## Median : 19480 Median : 48066 Median : 2619 Median :25272
## Mean : 72223 Mean : 49754 Mean : 3176 Mean :26040
## 3rd Qu.: 51224 3rd Qu.: 55764 3rd Qu.: 3834 3rd Qu.:29126
## Max. :6218279 Max. :129588 Max. :41001 Max. :69529
## NA's :1725 NA's :1725 NA's :1725 NA's :1725
## income_per_cap_err poverty child_poverty professional
## Min. : 129 Min. : 2.40 Min. : 0.00 Min. :11.40
## 1st Qu.: 849 1st Qu.:11.30 1st Qu.:14.80 1st Qu.:27.30
## Median : 1240 Median :15.20 Median :21.20 Median :30.50
## Mean : 1532 Mean :15.99 Mean :22.11 Mean :31.54
## 3rd Qu.: 1826 3rd Qu.:19.40 3rd Qu.:27.80 3rd Qu.:34.90
## Max. :16145 Max. :52.00 Max. :76.50 Max. :69.00
## NA's :1725 NA's :1725 NA's :1726 NA's :1725
## service office construction production
## Min. : 0.00 Min. : 4.80 Min. : 0.00 Min. : 0.00
## 1st Qu.:15.70 1st Qu.:19.90 1st Qu.: 9.80 1st Qu.:11.60
## Median :17.80 Median :22.00 Median :12.20 Median :15.50
## Mean :18.12 Mean :21.78 Mean :12.64 Mean :15.93
## 3rd Qu.:20.07 3rd Qu.:23.80 3rd Qu.:14.90 3rd Qu.:19.60
## Max. :46.40 Max. :37.20 Max. :36.40 Max. :48.70
## NA's :1725 NA's :1725 NA's :1725 NA's :1725
## drive carpool transit walk
## Min. : 4.60 Min. : 0.000 Min. : 0.0000 Min. : 0.000
## 1st Qu.:77.20 1st Qu.: 8.100 1st Qu.: 0.1000 1st Qu.: 1.400
## Median :81.00 Median : 9.500 Median : 0.3000 Median : 2.300
## Mean :79.52 Mean : 9.899 Mean : 0.9368 Mean : 3.236
## 3rd Qu.:84.00 3rd Qu.:11.300 3rd Qu.: 0.8000 3rd Qu.: 3.800
## Max. :97.20 Max. :29.300 Max. :61.8000 Max. :59.200
## NA's :1725 NA's :1725 NA's :1725 NA's :1725
## other_transp work_at_home mean_commute employed
## Min. : 0.000 Min. : 0.000 Min. : 5.10 Min. : 39
## 1st Qu.: 0.900 1st Qu.: 2.900 1st Qu.:19.60 1st Qu.: 4550
## Median : 1.300 Median : 4.100 Median :23.10 Median : 10695
## Mean : 1.603 Mean : 4.803 Mean :23.35 Mean : 47931
## 3rd Qu.: 1.900 3rd Qu.: 5.800 3rd Qu.:26.90 3rd Qu.: 29515
## Max. :43.200 Max. :33.000 Max. :45.10 Max. :4805817
## NA's :1725 NA's :1725 NA's :1725 NA's :1725
## private_work public_work self_employed family_work
## Min. :31.10 Min. : 4.40 Min. : 0.000 Min. :0.0000
## 1st Qu.:71.70 1st Qu.:12.70 1st Qu.: 5.200 1st Qu.:0.1000
## Median :76.30 Median :15.70 Median : 6.800 Median :0.2000
## Mean :75.07 Mean :16.89 Mean : 7.758 Mean :0.2824
## 3rd Qu.:80.30 3rd Qu.:19.50 3rd Qu.: 9.175 3rd Qu.:0.3000
## Max. :88.80 Max. :64.80 Max. :38.000 Max. :8.0000
## NA's :1725 NA's :1725 NA's :1725 NA's :1725
## unemployment winner winner16
## Min. : 0.000 Length:4867 Length:4867
## 1st Qu.: 4.400 Class :character Class :character
## Median : 6.100 Mode :character Mode :character
## Mean : 6.364
## 3rd Qu.: 7.800
## Max. :28.800
## NA's :1725
summary(data)
## county_fips state_abbv state_fips county_name
## Length:3142 Length:3142 Length:3142 Length:3142
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## fips_class state_name county_name2 x1
## Length:3142 Length:3142 Length:3142 Min. : 0
## Class :character Class :character Class :character 1st Qu.: 785
## Mode :character Mode :character Mode :character Median :1570
## Mean :1587
## 3rd Qu.:2355
## Max. :4953
## NA's :1
## percentage16_donald_trump percentage16_hillary_clinton total_votes16
## Min. :0.0410 Min. :0.0310 Min. : 64
## 1st Qu.:0.5500 1st Qu.:0.2045 1st Qu.: 4824
## Median :0.6670 Median :0.2850 Median : 10935
## Mean :0.6362 Mean :0.3168 Mean : 40916
## 3rd Qu.:0.7505 3rd Qu.:0.3990 3rd Qu.: 28675
## Max. :0.9530 Max. :0.9280 Max. :2314275
## NA's :31 NA's :31 NA's :31
## votes16_donald_trump votes16_hillary_clinton percentage20_donald_trump
## Min. : 57 Min. : 4 Min. :0.0400
## 1st Qu.: 3207 1st Qu.: 1164 1st Qu.:0.5620
## Median : 7117 Median : 3140 Median :0.6840
## Mean : 19350 Mean : 19566 Mean :0.6516
## 3rd Qu.: 17396 3rd Qu.: 9536 3rd Qu.:0.7750
## Max. :590465 Max. :1654626 Max. :0.9620
## NA's :31 NA's :31 NA's :27
## percentage20_joe_biden total_votes20 votes20_donald_trump
## Min. :0.0310 Min. : 0 Min. : 0
## 1st Qu.:0.2090 1st Qu.: 5121 1st Qu.: 3499
## Median :0.2990 Median : 11899 Median : 7952
## Mean :0.3309 Mean : 46856 Mean : 22480
## 3rd Qu.:0.4190 3rd Qu.: 31072 3rd Qu.: 19511
## Max. :0.9400 Max. :4139895 Max. :1107090
## NA's :27 NA's :52 NA's :52
## votes20_joe_biden lat long cases
## Min. : 0 Min. :19.60 Min. :-159.60 Min. : 0
## 1st Qu.: 1242 1st Qu.:34.61 1st Qu.: -98.18 1st Qu.: 240
## Median : 3480 Median :38.36 Median : -90.42 Median : 654
## Mean : 23545 Mean :38.26 Mean : -91.94 Mean : 2909
## 3rd Qu.: 10814 3rd Qu.:41.73 3rd Qu.: -83.62 3rd Qu.: 1795
## Max. :2947568 Max. :48.82 Max. : -67.63 Max. :309190
## NA's :52 NA's :70 NA's :70 NA's :70
## deaths total_pop men women
## Min. : 0.00 Min. : 74 Min. : 39 Min. : 35
## 1st Qu.: 3.00 1st Qu.: 10948 1st Qu.: 5516 1st Qu.: 5462
## Median : 11.00 Median : 25694 Median : 12804 Median : 12887
## Mean : 73.77 Mean : 102189 Mean : 50304 Mean : 51885
## 3rd Qu.: 36.00 3rd Qu.: 67413 3rd Qu.: 33480 3rd Qu.: 34102
## Max. :7404.00 Max. :10105722 Max. :4979641 Max. :5126081
## NA's :70 NA's :3 NA's :3 NA's :3
## hispanic white black native
## Min. : 0.000 Min. : 0.60 Min. : 0.00 Min. : 0.000
## 1st Qu.: 2.050 1st Qu.: 65.10 1st Qu.: 0.60 1st Qu.: 0.100
## Median : 4.000 Median : 84.20 Median : 2.10 Median : 0.300
## Mean : 9.104 Mean : 76.78 Mean : 8.90 Mean : 1.811
## 3rd Qu.: 9.300 3rd Qu.: 92.95 3rd Qu.: 9.85 3rd Qu.: 0.600
## Max. :99.200 Max. :100.00 Max. :86.90 Max. :90.300
## NA's :3 NA's :3 NA's :3 NA's :3
## asian pacific voting_age_citizen income
## Min. : 0.000 Min. : 0.00000 Min. : 59 Min. : 19264
## 1st Qu.: 0.300 1st Qu.: 0.00000 1st Qu.: 8280 1st Qu.: 41126
## Median : 0.600 Median : 0.00000 Median : 19506 Median : 48072
## Mean : 1.319 Mean : 0.08515 Mean : 72244 Mean : 49758
## 3rd Qu.: 1.200 3rd Qu.: 0.10000 3rd Qu.: 51210 3rd Qu.: 55763
## Max. :41.800 Max. :33.70000 Max. :6218279 Max. :129588
## NA's :3 NA's :3 NA's :3 NA's :3
## income_err income_per_cap income_per_cap_err poverty
## Min. : 262 Min. : 9334 Min. : 129 Min. : 2.40
## 1st Qu.: 1762 1st Qu.:21824 1st Qu.: 849 1st Qu.:11.30
## Median : 2619 Median :25273 Median : 1239 Median :15.20
## Mean : 3177 Mean :26041 Mean : 1532 Mean :15.98
## 3rd Qu.: 3836 3rd Qu.:29124 3rd Qu.: 1826 3rd Qu.:19.40
## Max. :41001 Max. :69529 Max. :16145 Max. :52.00
## NA's :3 NA's :3 NA's :3 NA's :3
## child_poverty professional service office construction
## Min. : 0.0 Min. :11.40 Min. : 0.00 Min. : 4.80 Min. : 0.00
## 1st Qu.:14.8 1st Qu.:27.30 1st Qu.:15.70 1st Qu.:19.90 1st Qu.: 9.80
## Median :21.2 Median :30.50 Median :17.80 Median :22.00 Median :12.20
## Mean :22.1 Mean :31.54 Mean :18.12 Mean :21.78 Mean :12.63
## 3rd Qu.:27.8 3rd Qu.:34.90 3rd Qu.:20.05 3rd Qu.:23.80 3rd Qu.:14.90
## Max. :76.5 Max. :69.00 Max. :46.40 Max. :37.20 Max. :36.40
## NA's :4 NA's :3 NA's :3 NA's :3 NA's :3
## production drive carpool transit
## Min. : 0.00 Min. : 4.60 Min. : 0.000 Min. : 0.0000
## 1st Qu.:11.60 1st Qu.:77.20 1st Qu.: 8.100 1st Qu.: 0.1000
## Median :15.50 Median :81.00 Median : 9.500 Median : 0.3000
## Mean :15.93 Mean :79.53 Mean : 9.895 Mean : 0.9369
## 3rd Qu.:19.60 3rd Qu.:84.00 3rd Qu.:11.300 3rd Qu.: 0.8000
## Max. :48.70 Max. :97.20 Max. :29.300 Max. :61.8000
## NA's :3 NA's :3 NA's :3 NA's :3
## walk other_transp work_at_home mean_commute
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 5.10
## 1st Qu.: 1.400 1st Qu.: 0.900 1st Qu.: 2.900 1st Qu.:19.60
## Median : 2.300 Median : 1.300 Median : 4.100 Median :23.10
## Mean : 3.231 Mean : 1.602 Mean : 4.804 Mean :23.35
## 3rd Qu.: 3.800 3rd Qu.: 1.900 3rd Qu.: 5.800 3rd Qu.:26.90
## Max. :59.200 Max. :43.200 Max. :33.000 Max. :45.10
## NA's :3 NA's :3 NA's :3 NA's :3
## employed private_work public_work self_employed
## Min. : 39 Min. :31.10 Min. : 4.40 Min. : 0.000
## 1st Qu.: 4551 1st Qu.:71.70 1st Qu.:12.70 1st Qu.: 5.200
## Median : 10697 Median :76.30 Median :15.70 Median : 6.800
## Mean : 47946 Mean :75.07 Mean :16.89 Mean : 7.758
## 3rd Qu.: 29488 3rd Qu.:80.30 3rd Qu.:19.50 3rd Qu.: 9.150
## Max. :4805817 Max. :88.80 Max. :64.80 Max. :38.000
## NA's :3 NA's :3 NA's :3 NA's :3
## family_work unemployment winner winner16
## Min. :0.0000 Min. : 0.000 Length:3142 Length:3142
## 1st Qu.:0.1000 1st Qu.: 4.400 Class :character Class :character
## Median :0.2000 Median : 6.100 Mode :character Mode :character
## Mean :0.2826 Mean : 6.364
## 3rd Qu.:0.3000 3rd Qu.: 7.800
## Max. :8.0000 Max. :28.800
## NA's :3 NA's :3
## geometry
## MULTIPOLYGON :3142
## epsg:2163 : 0
## +proj=laea...: 0
##
##
##
##
Now we will create a new dataset with the STATE POLYGONS from urbanmap and the data from electoral_votes. We will add new data such as:
winner_by: who won the state in 2020winner_by16: who won the state in 2016was_flipped: whether the state was flipped in 2020We will use this data to produce maps and analyze the underlying trends on them. The data included was taken from this article
# Identified swing states
swing_states <- c("Arizona","Texas","Florida","Georgia","Pennsylvania","Ohio","Wisconsin","North Carolina","South Carolina","Iowa","Nevada","Michigan")
# Swing states Won by Trump
trump_won_swing<- c("Texas","Florida","Ohio","North Carolina","South Carolina","Iowa")
# Swing states won by Biden
biden_won_swing<- c("Arizona","Georgia","Pennsylvania","Wisconsin","Nevada","Michigan")
# Swing states won by Trump in 2016
trump_won_swing16<-c("Arizona","Texas","Florida","Georgia","Pennsylvania","Ohio","Wisconsin","North Carolina","South Carolina","Iowa","Michigan")
# Swing states won by Hillary in 2016
hillary_won_swing16<-c("Nevada")
# Swing states flipped by Biden in 2020
flipped<-c("Arizona","Georgia","Pennsylvania","Wisconsin","Michigan")
electoral_mapping<-
get_urbn_map("states", sf=TRUE) %>%
left_join(electoral_votes %>% select(!year),
by =c("state_name" = "state")
) %>%
# Create variable winner_by
mutate(winner_by = ifelse(state_name %in% trump_won_swing,
"Trump",
ifelse(state_name %in% biden_won_swing,
"Biden", NA)),
# Create variable winner_by 16
winner_by16 = ifelse(state_name %in% trump_won_swing16, "Trump",
ifelse(state_name %in% hillary_won_swing16, "Hillary", NA)),
# Create variable was flipped
was_flipped = ifelse(state_name %in% flipped, TRUE, FALSE),
# Create variable colour
colour = ifelse(state_name %in% trump_won_swing, "red",
ifelse(state_name %in% hillary_won_swing16, "blue",
ifelse(state_name %in% flipped, "flipped",NA))))
Battle Trump against Biden
party_colours <- c("Biden" = "#2E74C0", "Trump"= "#CB454A")
election %>%
select(votes20_donald_trump,votes20_joe_biden) %>%
drop_na() %>%
summarise(Biden = sum(votes20_joe_biden),
Trump = sum(votes20_donald_trump)) %>%
ggplot() +
geom_col(aes(x = 1, y = 538, fill = "Trump"), width = 1) +
geom_col(aes(x = 1, y = 306, fill = "Biden"), width = 1) +
scale_fill_manual(values = party_colours) +
labs(title = "Biden triumphs by 6M votes",
subtitle = "2020 election vote counts",
fill = NULL) +
theme_void() +
theme(legend.position = "top",
plot.title = element_text(family = "Courier", face = "bold", size = 20),
plot.subtitle = element_text(family = "Courier", size = 12),
legend.text=element_text(family="Courier"),
legend.title=element_text(family="Courier"))+
xlim(c(0,2)) +
coord_flip() +
geom_linerangeh(aes(y = 270, xmin = 0.5, xmax = 1.75),
size = 0.7,
color = "black") +
annotate("text", x=1.82, y=270, label="270 electors", size = 4,color = "black",family = "Courier") +
# annotate("text", x=1.6, y=70, label="306 electors", size = 8,color = "#2E74C0") +
# annotate("text", x=1.6, y=470, label="232 electors", size = 8,color = "#CB454A") +
annotate("text", x=1, y=35, label="306", size = 10,color = "white", fontface = "bold", family = "Courier") +
annotate("text", x=1, y=505, label="232", size = 10,color = "white", fontface = "bold",family = "Courier") +
annotate("text", x=0.4, y=70, label="78M votes (52%)", size = 5,color = "#2E74C0",family = "Courier") +
annotate("text", x=0.4, y=465, label="72M votes (48%)", size = 5,color = "#CB454A",family = "Courier") +
theme(plot.title = element_text(face = "bold"))
Representation of counties
party_colours <- c("Biden" = "#2E74C0", "Trump"= "#CB454A")
p1 <- election %>%
select(votes20_donald_trump,votes20_joe_biden) %>%
drop_na() %>%
summarise(Biden = sum(votes20_joe_biden),
Trump = sum(votes20_donald_trump)) %>%
ggplot() +
geom_col(aes(x = 0.76, y = Biden + Trump, fill = "Trump"), width = 1.5) +
geom_col(aes(x = 0.76, y = Biden, fill = "Biden"), width = 1.5) +
scale_fill_manual(values = party_colours) +
labs(title = "Counties do not display people",
subtitle = "2020 election vote counts",
fill = NULL) +
theme_void() +
theme(plot.title = element_text(family = "Courier", face = "bold", size = 20),
plot.subtitle = element_text(family = "Courier", size = 12),
legend.position = "top",
text = element_text(family = "Courier"))+
xlim(c(0,2)) +
coord_flip() +
# geom_linerangeh(aes(y = 0.5*(Biden+Trump), xmin = 0.5, xmax = 1.75),
# size = 0.5,
# color = "black") +
annotate("text", x=0.76, y=12000000, label="78M", size = 10,color = "white", fontface = "bold",family = "Courier") +
annotate("text", x=0.76, y=137500000, label="72M", size = 10,color = "white", fontface = "bold",family = "Courier") +
annotate("text", x=1.8, y=18000000, label="By vote count:", size = 4.5,color = "black", fontface = "bold",family = "Courier") +
theme(plot.title = element_text(face = "bold"))
party_colours <- c("Biden" = "#2E74C0", "Trump"= "#CB454A")
summarised_el <- election %>%
select(votes20_donald_trump,votes20_joe_biden) %>%
drop_na() %>%
summarise(Biden = sum(votes20_joe_biden),
Trump = sum(votes20_donald_trump))
p2 <- data %>%
# select(votes20_donald_trump,votes20_joe_biden) %>%
# drop_na() %>%
# pivot_longer(cols = Biden:Trump, names_to = ("candidate")) %>%
ggplot() +
geom_bar(aes(x = 0.76, fill = winner), position = position_stack(reverse = T), width = 1.5) +
# geom_col(`dataaes(x = 1, y = Biden + Trump, fill = "Trump"), width = 1)
# geom_bar(aes(x = 1, y = Biden + Trump, fill = "Trump")) +
# geom_col(aes(x = 1, y = Biden, fill = "Biden")) +
scale_fill_manual(values = party_colours, na.translate = F) +
# labs(title = "Trump gets majory of counties dispite loss",
# subtitle = "2020 election vote counts by counties",
# fill = NULL) +
theme_void() +
theme(plot.title = element_text(face = 'bold'),
legend.position = "top",
text = element_text(family = "Courier"))+
xlim(c(0,2)) + ylim(c(0,3115)) +
coord_flip() +
annotate("text", x=0.76, y=210, label="515", size = 10,color = "white", fontface = "bold",family = "Courier") +
annotate("text", x=0.76, y=2800, label="2600", size = 10,color = "white", fontface = "bold",family = "Courier") +
annotate("text", x=1.8, y=320, label="By counties:", size = 4.5,color = "black", fontface = "bold",family = "Courier") +
guides(fill = FALSE)
library(patchwork)
p1/p2
Votes in 2016
# electoral votes battle
party_colours <- c("Clinton" = "#2E74C0", "Trump"= "#CB454A")
election %>%
select(votes20_donald_trump,votes20_joe_biden) %>%
drop_na() %>%
summarise(Biden = sum(votes20_joe_biden),
Trump = sum(votes20_donald_trump)) %>%
ggplot() +
geom_col(aes(x = 1, y = 538, fill = "Trump"), width = 1) +
geom_col(aes(x = 1, y = 232, fill = "Clinton"), width = 1) +
scale_fill_manual(values = party_colours) +
labs(title = "Trump won by electoral vote difference",
subtitle = "2016 Election Results",
fill = NULL) +
theme_void() +
theme(legend.position = "top",
plot.title = element_text(family = "Courier", face = "bold", size = 20),
plot.subtitle = element_text(family = "Courier", size = 12),
legend.text=element_text(family="Courier"),
legend.title=element_text(family="Courier"))+
xlim(c(0,2)) +
coord_flip() +
geom_linerangeh(aes(y = 270, xmin = 0.5, xmax = 1.75),
size = 0.7,
color = "black") +
annotate("text", x=1.82, y=270, label="270 electors", size = 4,color = "black",family = "Courier") +
# annotate("text", x=1.6, y=70, label="306 electors", size = 8,color = "#2E74C0") +
# annotate("text", x=1.6, y=470, label="232 electors", size = 8,color = "#CB454A") +
annotate("text", x=1, y=35, label="232", size = 10,color = "white", fontface = "bold", family = "Courier") +
annotate("text", x=1, y=505, label="306", size = 10,color = "white", fontface = "bold",family = "Courier") +
annotate("text", x=0.4, y=70, label="66M votes (48%)", size = 5,color = "#2E74C0",family = "Courier") +
annotate("text", x=0.4, y=465, label="63M votes (46%)", size = 5,color = "#CB454A",family = "Courier") +
theme(plot.title = element_text(face = "bold"))
Votes by Counties
data %>%
ggplot(aes()) +
geom_sf(aes(fill = winner, colour = winner)) +
scale_fill_manual(values = c("#2E74C0","#CB454A"), na.translate = F) +
scale_colour_manual(values = c("#2E74C0","#CB454A"), na.translate = F) +
coord_sf(datum = NA) +
theme_void() +
theme(plot.title = element_text(face = "bold"),
legend.position = "top",
text = element_text(family = "Courier")) +
labs(title = "US turns red despite Trump loss",
subtitle = "US county map colored by winner",
fill = NULL) +
guides(color= FALSE)
Now we will create circles to visualize this by population
circles_data <- data
st_geometry(circles_data) <- NULL
circles_data_sf <- circles_data %>%
drop_na(long, lat) %>%
filter(state_abbv != "HI") %>%
mutate(long = long,
lat = lat) %>%
st_as_sf(coords = c('long', 'lat'),
crs = 4326)
glimpse(circles_data_sf)
## Rows: 3,067
## Columns: 57
## $ county_fips <chr> "04015", "12035", "20129", "28093", "350…
## $ state_abbv <chr> "AZ", "FL", "KS", "MS", "NM", "NC", "ND"…
## $ state_fips <chr> "04", "12", "20", "28", "35", "37", "38"…
## $ county_name <chr> "Mohave County", "Flagler County", "Mort…
## $ fips_class <chr> "H1", "H1", "H1", "H1", "H1", "H1", "H1"…
## $ state_name <chr> "Arizona", "Florida", "Kansas", "Mississ…
## $ county_name2 <chr> "Mohave", "Flagler", "Morton", "Marshall…
## $ x1 <dbl> 1929, 921, 2001, 1799, 1844, 2937, 310, …
## $ percentage16_donald_trump <dbl> 0.737, 0.589, 0.836, 0.444, 0.233, 0.419…
## $ percentage16_hillary_clinton <dbl> 0.222, 0.383, 0.125, 0.540, 0.628, 0.571…
## $ total_votes16 <dbl> 74189, 57413, 1160, 14698, 20959, 6115, …
## $ votes16_donald_trump <dbl> 54656, 33804, 970, 6525, 4893, 2560, 879…
## $ votes16_hillary_clinton <dbl> 16485, 21985, 145, 7944, 13162, 3490, 11…
## $ percentage20_donald_trump <dbl> 0.750, 0.599, 0.863, 0.505, 0.295, 0.449…
## $ percentage20_joe_biden <dbl> 0.237, 0.392, 0.126, 0.481, 0.680, 0.547…
## $ total_votes20 <dbl> 104667, 71846, 1163, 13064, 26406, 6191,…
## $ votes20_donald_trump <dbl> 78534, 43039, 1004, 6591, 7782, 2778, 98…
## $ votes20_joe_biden <dbl> 24831, 28148, 147, 6283, 17969, 3387, 13…
## $ cases <dbl> 4453, 2181, 56, 1548, 4752, 264, 86, 68,…
## $ deaths <dbl> 234, 39, 2, 32, 262, 8, 1, 0, 30, 52, 6,…
## $ total_pop <dbl> 204691, 105015, 2931, 35981, 72849, 1233…
## $ men <dbl> 103175, 50436, 1534, 17793, 35209, 5846,…
## $ women <dbl> 101516, 54579, 1397, 18188, 37640, 6485,…
## $ hispanic <dbl> 15.9, 10.0, 22.6, 3.5, 14.1, 5.0, 1.6, 5…
## $ white <dbl> 78.0, 74.7, 72.4, 47.7, 9.1, 44.9, 93.4,…
## $ black <dbl> 1.0, 10.0, 0.8, 47.6, 0.5, 48.2, 0.4, 0.…
## $ native <dbl> 2.1, 0.3, 0.0, 0.1, 73.3, 0.1, 2.8, 0.4,…
## $ asian <dbl> 1.2, 2.5, 0.1, 0.1, 0.9, 0.2, 0.0, 0.0, …
## $ pacific <dbl> 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, …
## $ voting_age_citizen <dbl> 160544, 83353, 1978, 27763, 50540, 9521,…
## $ income <dbl> 41567, 51049, 43813, 41134, 30336, 34557…
## $ income_err <dbl> 796, 1256, 9676, 2932, 1354, 3147, 4852,…
## $ income_per_cap <dbl> 23527, 25741, 23038, 19775, 14077, 21117…
## $ income_per_cap_err <dbl> 630, 654, 1831, 1047, 634, 2590, 3351, 2…
## $ poverty <dbl> 18.6, 13.3, 8.0, 17.8, 37.5, 24.1, 7.3, …
## $ child_poverty <dbl> 27.2, 21.5, 7.9, 26.6, 45.7, 46.6, 7.1, …
## $ professional <dbl> 24.3, 31.6, 29.5, 23.2, 29.4, 15.8, 35.2…
## $ service <dbl> 25.4, 19.3, 15.5, 14.3, 25.1, 23.0, 14.6…
## $ office <dbl> 26.8, 31.9, 17.3, 24.9, 22.0, 23.6, 15.7…
## $ construction <dbl> 10.9, 9.4, 32.5, 12.6, 9.7, 12.3, 21.8, …
## $ production <dbl> 12.7, 7.8, 5.3, 25.1, 13.8, 25.3, 12.8, …
## $ drive <dbl> 79.9, 83.5, 71.3, 86.1, 74.8, 81.2, 73.2…
## $ carpool <dbl> 12.6, 6.6, 13.6, 5.4, 9.7, 13.1, 10.4, 1…
## $ transit <dbl> 0.9, 0.3, 0.0, 0.4, 0.6, 0.0, 0.0, 0.0, …
## $ walk <dbl> 1.5, 0.3, 3.9, 1.4, 3.7, 1.6, 8.0, 4.2, …
## $ other_transp <dbl> 2.1, 1.7, 2.0, 0.8, 1.6, 2.3, 0.6, 0.6, …
## $ work_at_home <dbl> 3.1, 7.7, 9.3, 6.0, 9.5, 1.8, 7.8, 7.9, …
## $ mean_commute <dbl> 20.4, 25.7, 17.7, 28.2, 22.1, 25.1, 23.9…
## $ employed <dbl> 69774, 39278, 1275, 14304, 23207, 4548, …
## $ private_work <dbl> 78.6, 81.5, 69.6, 80.3, 56.9, 78.4, 71.1…
## $ public_work <dbl> 14.1, 11.8, 20.5, 11.9, 35.7, 15.0, 17.4…
## $ self_employed <dbl> 7.1, 6.6, 9.8, 7.7, 7.4, 6.6, 11.0, 14.4…
## $ family_work <dbl> 0.2, 0.1, 0.0, 0.1, 0.0, 0.0, 0.4, 0.1, …
## $ unemployment <dbl> 10.1, 6.5, 8.7, 6.2, 16.1, 10.4, 2.6, 3.…
## $ winner <chr> "Trump", "Trump", "Trump", "Trump", "Bid…
## $ winner16 <chr> "Trump", "Trump", "Trump", "Clinton", "C…
## $ geometry <POINT [°]> POINT (-113.7578 35.70472), POINT …
library("openxlsx")
# Write the first data set in a new workbook
write.xlsx(circles_data, 'electcion.xlsx')
Land doesn’t vote, people do.
ggplot() +
geom_sf(data = data %>% filter(state_abbv != "AK", state_abbv != "HI"), fill = "#F5F5F5", colour = "grey70") +
geom_sf(data = circles_data_sf, aes(colour = winner, size = total_pop),key_glyph = "rect") +
scale_colour_manual(values = c("#2E74C0","#CB454A"), na.translate = F) +
scale_size_area(max_size = 7) + #scale proportional to population
coord_sf(datum = NA) +
theme_minimal() +
theme(plot.title = element_text(face = "bold"),
legend.position = "top",
text = element_text(family = "Courier")) +
labs(title = "Land doesn't vote - people do",
subtitle = "County winners scaled to population size") +
guides(color=guide_legend(NULL), size = FALSE)
## Swing States Data
Identify Swing States
colour_scale<- c("grey","#8A0303")
fill_scale<- c("white","#8A0303")
identify_swing<- data %>%
# Create abbreviations in data
mutate(swing = ifelse(state_name %in% swing_states, TRUE, FALSE),
name_swing = ifelse(swing == TRUE, state_abbv, NA)) %>%
# Group by
group_by(state_name, swing, name_swing) %>%
summarise(long = max(long),
lat = max(lat)) %>%
# Plot
ggplot() +
# Geom SF
geom_sf(aes(fill = swing, colour=swing)) +
# Fill Scale
scale_fill_manual(values = fill_scale) +
# Colour Scale
scale_colour_manual(values = colour_scale)+
# Geom SF Text
geom_sf_text(aes(label = name_swing),
colour="white",
family = "Courier",
face= "bold",
size=5) +
# Remove Coordinates
coord_sf(datum = NA) +
# Theme Void
theme_void() +
# Add Labels
labs(
title = "Which are the American Swing States?",
subtitle = "Where the election actually happened") +
# Theme Settings
theme(
legend.position = "none",
text = element_text(family= "Courier",size=15),
plot.title = element_text(face="bold")
) +
NULL
identify_swing
How many electoral votes do Swing States have?
plot_votes<- electoral_votes %>%
filter(state %in% swing_states)%>%
arrange(votes) %>% mutate(
cumulative = cumsum(votes)
) %>%
ggplot() +
geom_col(aes(x = reorder(state, cumulative), y = votes), fill="steelblue") +
geom_line(aes(x = reorder(state, cumulative), group=1, y = cumulative)) +
geom_point(aes(x =reorder(state, cumulative), y=cumulative)) +
# Add arrow
geom_segment(aes(x = 12, y = 194, xend = 8, yend = 193),
arrow = arrow(length = unit(0.5, "cm")), colour="#8A0303") +
# Annotate
annotate("text", x = 7.5, y = 193, label = '194', size = 5, angle = 0, fontface = "bold") +
# Labels
labs(title = "Swing States add up to 194 electoral votes",
subtitle = "Cumulative increase of electoral votes in Swing States",
x = "Swing States",
y = "Electoral Votes") +
# Theme settings
theme_minimal() +
theme(axis.text.x = element_text(angle = 20),
text = element_text(family="Courier", size=15),
plot.title = element_text(face="bold"))
plot_votes
Swing States - Outcome in 2016
electoral_mapping %>%
mutate(colour = replace_na(colour, "none")) %>%
ggplot() +
geom_sf(aes(fill=winner_by16, colour=winner_by16))+
scale_fill_manual(values = c("#2E74C0","#CB454A" ), na.translate=FALSE) +
scale_colour_manual(values = c("#2E74C0","#CB454A"), na.value= "grey80") +
guides(colour = FALSE) +
geom_sf_text(aes(label=ifelse(!is.na(winner_by16),votes," ")), color='white', family="Courier", size=5) +
coord_sf(datum=NA) +
theme_void() +
labs(title = "Trump won 188 electoral votes from Swing States in 2016",
subtitle = "Outcome in Swing States in 2016 Elections",
fill = "Winner") +
theme(text = element_text(family="Courier", size=15),
plot.title = element_text(face="bold")) +
NULL
Swing States Flipped by Biden
electoral_mapping %>%
ggplot() +
geom_sf(aes(fill=was_flipped, colour=was_flipped))+
scale_fill_manual(values = c("white", "#2E74C0")) +
scale_colour_manual(values = c("grey", "#2E74C0")) +
geom_sf_text(aes(label=ifelse(was_flipped==TRUE,state_abbv," ")), colour="white",size=5, family = "Courier") +
#geom_sf_label(aes(label=ifelse(was_flipped==TRUE,electoral_votes," ")),size=3.75, family = "Courier") +
coord_sf(datum=NA) +
theme_void() +
labs(title = "Joe Biden Flipped 5 States in the Elections",
subtitle = "Flipped States from 2016 to 2020") +
theme(legend.position = "none",
text = element_text(family="Courier", size=15),
plot.title = element_text( face="bold")) +
NULL
Swing States Flipped by Trump
electoral_mapping %>%
mutate(trump_flipped = "States Flipped by Trump") %>%
ggplot() +
geom_sf(aes(fill=trump_flipped, colour=trump_flipped))+
scale_fill_manual(values = c("white")) +
scale_colour_manual(values = c("grey")) +
guides(fill = FALSE) +
geom_col(aes(x=0, y=0), fill="#CB454A", show.legend = TRUE) +
coord_sf(datum=NA) +
theme_void() +
labs(title = "Donald Trump was not persuasive enough",
subtitle = "Mr President did not manage to flip any states from 2016 to 2020",
colour = " ")+
theme(text = element_text(family = "Courier", size= 15),
plot.title = element_text(face="bold")) +
NULL
Map - Swing States Got Biden The Presidency
electoral_mapping %>%
mutate(#was_flipped = replace_na(was_flipped,FALSE),
# winner_by = replace_na(winner_by, "none"),
colour = replace_na(colour, "none")) %>%
ggplot() +
geom_sf(aes(fill=winner_by, colour=colour, group=was_flipped), size=.75)+
scale_fill_manual(values = c("#2E74C0","#CB454A" ), na.translate = FALSE) +
scale_colour_manual(values = c("#2E74C0",muted("#fffb0a"),"grey80","#CB454A")) +
geom_sf_text(aes(label=ifelse(was_flipped==TRUE,votes," ")), color='white', family="Courier", size=5) +
coord_sf(datum=NA) +
theme_void() +
#theme(legend.position = "none") +
guides(colour = FALSE)+
labs(title = "The Flipped Swing States Got Biden The Presidency",
subtitle="Flipped Swing States Highlighted Made The Difference in 2020",
fill = "Winner"
) +
theme(text = element_text(family="Courier",size=15),
plot.title = element_text(face="bold")) +
NULL
Bar Plot - Swing States Got Biden The Presidency
# final electoral vote battle
flipped<-c("Arizona","Wisconsin","Michigan","Pennsylvania","Georgia")
plot_votes_2<- electoral_votes %>%
filter(state %in% flipped)%>%
arrange(votes) %>% mutate(
cumulative = cumsum(votes)
) %>%
ggplot() +
geom_col(aes(x = reorder(state, cumulative), y = votes), fill="steelblue") +
geom_line(aes(x = reorder(state, cumulative), group=1, y = cumulative)) +
geom_point(aes(x =reorder(state, cumulative), y=cumulative)) +
# Add arrow
geom_segment(aes(x = 5, y = 20, xend = 5, yend = 73),
arrow = arrow(length = unit(0.5, "cm")), colour="#8A0303") +
# Arrow heading down
geom_segment(aes(x = 5, y = 73, xend = 5, yend = 20),
arrow = arrow(length = unit(0.5, "cm")), colour="#8A0303") +
# Annotate
annotate("text", x = 4.5, y = 50, label = '73 Votes', size = 13, angle = 0, fontface = "bold") +
# Labels
# geom_text_repel(aes(x = states, y = votes, label=votes)) +
labs(title = "The Swing States Got Biden The Presidency",
subtitle = "Cumulative increase of electoral votes in Swing States flipped",
x = "Swing States Flipped",
y = "Electoral Votes") +
# Theme settings
theme_minimal() +
theme(axis.text.x = element_text(angle = 20),
text = element_text(family="Courier", size=20),
plot.title = element_text(face="bold"))
plot_votes_2
We will now analyze the demographics of the data.
Biden wins in Counties with more people.
population_data<- data %>%
filter(total_pop<2500000)
plot<- ggplot(population_data,aes(x=total_pop,y=percentage20_joe_biden))+
geom_point(alpha=0.5,colour='blue')+
xlab('Total population of county') +
ylab('Percentage voted for Joe Biden')+
labs(title="Biden wins in counties with more people!",
subtitle ="Percentage votes to Biden by population in county") +
theme_minimal()+
theme(text = element_text(family="Courier"))
#theme(plot.title = element_text(face = "bold"))
#ggtitle("Biden wins in counties with more people!")
ggplotly(plot) %>%
layout(title = list(text = paste0('<b>','Biden wins in counties with more people!','<b>',
'<br>',
'<sup>',
'Percentage votes to Biden by population in county',
'</sup>')))
Did the rich vote for Trump?
plot_1<- ggplot(data,aes(x=income_per_cap,y=percentage20_joe_biden,color=unemployment))+
geom_point(alpha=0.8)+
scale_colour_gradientn(colours = terrain.colors(30))+
xlab('Income per capita for county') +
ylab('Percentage voted for Joe Biden')+
theme_minimal()+
theme(text = element_text(family="Courier"))+
ggtitle("Did the rich vote for Trump? Not really...")
ggplotly(plot_1) %>%
layout(title = list(text = paste0('<b>','Did the rich vote for Trump? Not really...','<b>',
'<br>',
'<sup>',
'Percentage votes to Biden by income per capita and unemployment ',
'</sup>')))
White people vote more for Trump?
g<-pivot_longer(data,c(black,white),names_to="race",values_to = "percentage")
#plot_2<- ggplot(g,aes(x=percentage,y=percentage20_donald_trump,colour=race))+
plot_2<- ggplot(g,aes(x=percentage,y=percentage20_donald_trump,colour=race))+
theme_minimal()+
geom_point(alpha=0.5)+
xlab('percentage of race in the county') +
ylab('Percentage voted for Donald Trump')+
ggtitle("White people vote more for Trump?")+
theme(text = element_text(family="Courier"))+
scale_color_manual(breaks = c("black", "white"),
values=c("blue", "red"))
ggplotly(plot_2) %>%
layout(title = list(text = paste0('<b>','White people vote more for Trump?','<b>',
'<br>',
'<sup>',
'Percentage votes to Trump split by race',
'</sup>')))
#plot_2
But who did actually vote for trump?
#z<-pivot_longer(data,c(self_employed,family_work),names_to="profession_type",values_to = "percentage_prof")
plot_3<-
ggplot(data,aes(x=self_employed,y=percentage20_donald_trump))+
theme_minimal()+
geom_point(alpha=0.5,colour='red')+
#facet_wrap(~ profession_type) +
xlab('percentage of self employed people in the county') +
ylab('Percentage voted for Donald Trump')+
ggtitle("But who did actually vote for Trump?")+
theme(text = element_text(family="Courier"))
ggplotly(plot_3) %>%
layout(title = list(text = paste0('<b>','But who did actually vote for Trump?','<b>',
'<br>',
'<sup>',
'Percentage votes to Trump by self employed percentage in county',
'</sup>')))
We will import 2 new datasets.
turnout_2016 <- read.csv("data/turnout_2016.csv") %>%
clean_names()
turnout_2020 <- read.csv("data/turnout_2020.csv") %>%
clean_names()
#join two dataset
data_turnout <- turnout_2016 %>%
left_join(turnout_2020, by = c("state" = "state")) %>%
rename(turnout_2016 = turnout_rate.x, turnout_2020 = turnout_rate.y) %>%
select(state, turnout_2016, turnout_2020)
glimpse(data_turnout)
## Rows: 52
## Columns: 3
## $ state <chr> "Minnesota", "New Hampshire", "Maine", "Iowa", "Wisconsi…
## $ turnout_2016 <dbl> 74.16, 70.31, 69.92, 68.56, 68.33, 67.86, 66.85, 65.61, …
## $ turnout_2020 <dbl> 79.9, 75.5, 76.3, 73.2, 75.8, 76.4, 75.5, 71.7, 73.0, 72…
Highest Turnout Rate Ever
library(extrafont)
extrafont::loadfonts(device="pdf")
turnout_1920_2020 <- read.csv("data/turnout_1920_2020.csv") %>%
clean_names()
highlight <- turnout_1920_2020[turnout_1920_2020$year == 2020, ]
turnout_1920_2020 %>%
ggplot(aes(x=year, y=united_states_presidential_vep_turnout_rate))+
geom_line(size = 1, color = "#2c7fb8") +
geom_point(color = "#2c7fb8") +
geom_label(data = highlight,
aes(label = "66.7%"),
color = "#2c7fb8",
box.padding = 0.25,
point.padding = 0.5,
vjust = -0.5,
hjust = 0.6,
family = "Courier")+
theme_minimal() +
labs(title = "The U.S. experienced highest turnout rate in over a century",
subtitle = "1920-2020 Voting eligible population turnout rates",
x = "Election Year",
y = "Turnout Rate %") +
# geom_vline(xintercept = 2020, color = "red", linetype = 5) +
# gghighlight::gghighlight(year == 2020, label_key = united_states_presidential_vep_turnout_rate) +
theme_minimal() +
scale_x_continuous(n.break = 10) +
theme(panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(size = 10),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10),
text = element_text(family="Courier")) +
expand_limits(y = c(40, 80))
In 2020 US election, More Americans voted than in any other in over 100 years. 66.7% percent of the voting-eligible population cast a ballot, delivering the popular vote and electoral college to Joe Biden, the Democratic candidate. Despite that the pandemic introduced a number of complications to voting day, early voting and mail-in ballots brought a record turnout for some states.
swing_states <- c("Arizona","Texas","Florida","Georgia","Pennsylvania","Ohio","Wisconsin","North Carolina","South Carolina","Iowa","Nevada","Michigan")
swing_win <- c( "#2E74C0", "#CB454A", "#CB454A", "#2E74C0", "#2E74C0", "#CB454A", "#2E74C0","#CB454A", "#CB454A", "#CB454A", "#2E74C0", "#2E74C0")
swing_colors <- data.frame(swing_states, swing_win)
swing_colors1 <- swing_colors$swing_win
names(swing_colors1) <- swing_colors$swing_states
data_turnout1 <- data_turnout %>%
filter(state %in% swing_states) %>%
pivot_longer(!state, names_to = "turnout_rate", values_to = "value")
glimpse(data_turnout1)
## Rows: 24
## Columns: 3
## $ state <chr> "Iowa", "Iowa", "Wisconsin", "Wisconsin", "Florida", "Fl…
## $ turnout_rate <chr> "turnout_2016", "turnout_2020", "turnout_2016", "turnout…
## $ value <dbl> 68.56, 73.20, 68.33, 75.80, 65.61, 71.70, 64.59, 73.90, …
data_turnout1$value <- round(data_turnout1$value,1)
ggplot(data=data_turnout1,aes(x = turnout_rate, y = value, group = state)) +
geom_line(size = 1)+
geom_point() +
geom_text(aes(x = turnout_rate, y = value, label = value), vjust = 2, size = 3, family = "Courier")+
facet_wrap(~state) +
scale_y_continuous(limits = c(40, 80)) +
aes(color = state) +
scale_color_manual(values = swing_colors1) +
theme_bw()+
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.title = element_text(face = "bold", size = 15),
legend.position = "none",
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
strip.text.x = element_text(size = 9, face = "bold"),
text = element_text(family="Courier"))+
scale_x_discrete(labels=c("turnout_2016" = "2016", "turnout_2020" = "2020")) +
labs(title = "Turnout on the rise in key battlegrounds",
subtitle = "From 2016 to 2020 presidential election")
Most of states saw an increase in voter turnout since 2016, with key battleground states like Florida, Michigan, Wisconsin and Pennsylvania seeing participation well above the national rate.
states_sf <- get_urbn_map("states", sf = TRUE)
data_state <- states_sf %>%
left_join(data_turnout, by = c("state_name" = "state"))
glimpse(data_state)
## Rows: 51
## Columns: 6
## $ state_fips <chr> "01", "04", "08", "09", "12", "13", "16", "18", "20", "2…
## $ state_abbv <chr> "AL", "AZ", "CO", "CT", "FL", "GA", "ID", "IN", "KS", "L…
## $ state_name <chr> "Alabama", "Arizona", "Colorado", "Connecticut", "Florid…
## $ turnout_2016 <dbl> 58.65, 56.35, 67.86, 62.59, 65.61, 60.00, 59.68, 56.18, …
## $ turnout_2020 <dbl> 63.1, 65.9, 76.4, 71.1, 71.7, 67.7, 67.7, 61.4, 64.2, 64…
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((1150023 -15..., MULTIPOLYGO…
data_state %>%
ggplot(aes()) +
geom_sf(aes(fill = turnout_2020), colour = "#ffffff")+
theme_void() +
labs(title = "Turnout rate in different states",
subtitle = "2020 Election",
fill = "turnout rate") +
geom_sf_text(aes(label = state_abbv), color = "white", size = 2, family = "Courier") +
theme(legend.position = "bottom",
plot.title = element_text(face = "bold", size = 14),
text = element_text(family="Courier")) +
scale_fill_gradient(low = "#1a9641",
high = "#d7191c")
## Top states with highest turnout rates
data_turnout_15 <- data_turnout %>%
arrange(desc(turnout_2020)) %>%
top_n(n=15, wt = turnout_2020)
glimpse(data_turnout_15)
## Rows: 15
## Columns: 3
## $ state <chr> "Minnesota", "Colorado", "Maine", "Wisconsin", "Washingt…
## $ turnout_2016 <dbl> 74.16, 67.86, 69.92, 68.33, 62.46, 70.31, 66.85, 64.66, …
## $ turnout_2020 <dbl> 79.9, 76.4, 76.3, 75.8, 75.7, 75.5, 75.5, 74.2, 73.9, 73…
data_turnout_15$state
## [1] "Minnesota" "Colorado" "Maine" "Wisconsin"
## [5] "Washington" "New Hampshire" "Oregon" "Vermont"
## [9] "Michigan" "Iowa" "Montana" "Virginia"
## [13] "Massachusetts" "Florida" "North Carolina"
top_states <- c("Minnesota","Colorado","Maine","Wisconsin","Washington","New Hampshire","Oregon","Vermont","Michigan","Iowa","Montana","Virginia","Massachusetts","Florida","North Carolina")
top_win <- c( "#2E74C0", "#2E74C0","#2E74C0", "#2E74C0", "#2E74C0", "#2E74C0", "#2E74C0", "#2E74C0", "#2E74C0", "#CB454A", "#CB454A", "#2E74C0", "#2E74C0", "#CB454A","#CB454A")
top_colors <- data.frame(top_states, top_win)
top_colors1 <- top_colors$top_win
names(top_colors1) <- top_colors$top_states
ggplot(data = data_turnout_15, aes(x=reorder(state, turnout_2020), y = turnout_2020)) +
geom_point(size = 2) +
coord_flip() +
theme_minimal() +
expand_limits(y = c(70, 80)) +
labs(title = "Biden won states with high turnout rate",
subtitle = "Top 15 states with highest turnout rates in 2020",
y = "turnout rate")+
theme(legend.position = "none",
axis.title.y = element_blank(),
plot.title = element_text(face = "bold", size = 16),
text = element_text(family="Courier")) +
geom_text(data=data_turnout_15, aes(x=state, y=turnout_2020, label = turnout_2020), vjust = 0.2, hjust = 1.1, color = "white") +
aes(color = state) +
scale_color_manual(values = top_colors1)
Among all the states, Minnesota, Colorado, Maine and Wisconsin have the highest turnout rate, and Biden won the majority of votes in those states with high voter turnout rate.
##Correlation Create correlation between Joe Biden and other demographic characteristics
data1<-data
data_turnout_new<-data_turnout %>%
filter(state!="United States") %>%
rename(X.x=turnout_2016, X.y=turnout_2020) %>%
mutate(difference= (X.y-X.x)/X.x *100)
covid_data<-data1 %>%
group_by(state_name) %>%
summarise(cases= sum(cases),population=sum(total_pop), lat=round(mean(lat), 0), long =round(mean(long), 0)) %>%
mutate(cases_per_1M=round(cases*1000000/population,0))
covid_data<-st_drop_geometry(covid_data)
states <-get_urbn_map("states", sf = TRUE)
turnout_and_covid <- states %>%
left_join(covid_data, by = "state_name") %>%
left_join(data_turnout_new, by= c("state_name"= "state"))
#deactivate geomtery to mutate columns
st_geometry(circles_data_sf) <- NULL
#Create corrlation matrix
correlation <-as.data.frame(cor(circles_data_sf[, 10:54],
method = "pearson",
use = "complete.obs")) %>%
#Rename empty columns
mutate(names = rownames(.)) %>%
#Arrange in descending order
arrange(desc(percentage20_joe_biden)) %>%
#Split into categories -Ethnicity, Economy, Job divisions and the rest (0)
mutate(number= ifelse(names %in% c("black", "asian", "pacific", "native", "hispanic","white"),1, ifelse(names %in% c("total_pop", "employed", "income_per_cap", "poverty"),2, ifelse(names %in% c("professional", "service", "office", "family_work", "production", "self_employed", "construction"), 3, 0)))) %>%
filter(number !=0) %>%
arrange(percentage20_joe_biden) %>%
arrange(desc(number))%>%
mutate(no=row_number())
#Create 3 different datafremes for next plots
correlation1<-correlation %>%
filter(number ==1)
correlation2<-correlation %>%
filter(number ==2)
correlation3<-correlation %>%
filter(number ==3)
#Create first plot (for Ethnicity), reordered by correlation with Joe Biden
cor1<-ggplot(correlation1, aes(x=reorder(names, no),
y=percentage20_joe_biden,
label=round(percentage20_joe_biden,2))) +
#Add column names instead of numerical value
geom_point(stat='identity', aes(col=percentage20_joe_biden), size=9) +
#Add gradient color in the bubble
scale_color_gradient2("",
low = muted("#CB454A"),
mid = "white",
high = muted("#2E74C0"),
midpoint=0, breaks=c(-0.5,0,0.45),
labels=c("More Trump",0,"More Biden"))+
geom_text(color="black", size=2) +
#Show correlations between these values
ylim(-0.6, 0.6) +
#Change theme
theme_minimal()+
#Remove x axis, change size of text and font
theme(panel.grid.major.x= element_blank(),
panel.grid.minor.x = element_blank(),
panel.background = element_blank(),
axis.ticks=element_blank(),
text=element_text(size=12, family="Courier"),
plot.title = element_text(color=muted("#CB454A"), face = "bold"))+
labs(title="Blacks & Asians Voted J.Biden, People Working in Construction or Production Did Not", subtitle= "Correlation between Demographics and Votes for J.Biden",
x="Ethnicity",
y= "",
fill="")+
#Change sizes of text
theme(plot.title = element_text(size=13),
plot.subtitle=element_text(size=12.5),
plot.caption = element_text(size=6))+
#Flip the coordinates
coord_flip()
#Create second plot for Economy
cor2<-ggplot(correlation2, aes(x=reorder(names, no),
y=percentage20_joe_biden,
label=round(percentage20_joe_biden,2))) +
#Use stat=identity to show names
geom_point(stat='identity', aes(col=percentage20_joe_biden), size=9) +
scale_color_gradient2("",low = muted("#CB454A"),
mid = "white",
high = muted("#2E74C0"),
midpoint=0,
breaks=c(-0.5,0,0.45),
labels=c("More Trump",0,"More Biden"))+
geom_text(color="black", size=2) +
ylim(-0.6, 0.6) +
#Flip coordinates
coord_flip()+
theme_minimal()+
#Change theme
theme(panel.grid.major.x= element_blank(),
panel.grid.minor.x = element_blank(),
panel.background = element_blank(),
axis.ticks=element_blank(),
text=element_text(size=12, family="Courier"))+
#Add the category
labs(y=NULL, x="Economy", fill="")
#Create third plot for Job division
cor3<-ggplot(correlation3,
aes(x=reorder(names, no),
y=percentage20_joe_biden,
label=round(percentage20_joe_biden,2))) +
geom_point(stat='identity', aes(col=percentage20_joe_biden), size=9) +
#Add diverging color from red for Trump to blue for Biden
scale_color_gradient2("",low = muted("#CB454A"),
mid = "white",
high = muted("#2E74C0"),
midpoint=0, breaks=c(-0.5,0,0.45),
labels=c("More Trump",0,"More Biden"))+
geom_text(color="black", size=2) +
ylim(-0.6, 0.6) +
coord_flip()+
theme_minimal()+
#Change theme
theme(panel.grid.major.x= element_blank(),
panel.grid.minor.x = element_blank(),
panel.background = element_blank(),
axis.ticks=element_blank(),
text=element_text(size=12, family="Courier"))+
#Put x axis (but it will be flipped)
labs(y="Correlation with % Votes Joe Biden", x="Job division", fill="")
#Combine plots
library(ggpubr)
#Arrange the plots on top of each other
ggarrange(cor1, cor2,cor3, ncol=1, nrow=3, align="hv", common.legend = TRUE, legend= "right", widths = c(15,1))
Plot correlation between covid rates and turnout rate
turnout_and_covid%>%
ggplot(aes(x=cases_per_1M,y= X.y)) +
geom_point(aes(label=state_abbv))+
#Add line
geom_smooth(method= "lm", se=FALSE, color=muted("#CB454A"))+
#Change theme
theme_minimal()+
#Add lable titles
labs(x="Coronavirus Cases per 1M",
y= "Turnout Rate per State",
title= "More Covid less Votes!",
subtitle="States' Turnout Rates and Level of Covid per 1 Million People")+
#Change font and size
theme(text=element_text(size=10, family="Courier"),
plot.title = element_text(color=muted("#CB454A"), face = "bold"))